#!/usr/bin/env python2
# -*- coding: utf-8 -*-

import os
import time
import json
import datetime
from sqlalchemy.exc import ResourceClosedError

from Ump import utils
from Ump.common import config
from Ump.common import exception
from Ump.common import log

from Ump.objs.session_wrapper import enable_log_and_session, enable_oplog
from Ump.objs.db import models
from Ump.objs.db.models import Host, ProtectionDomain
from Ump.objs.alert.api import AlertApi


from Ump.objs.manager_base import Manager
from Ump.objs.cluster.manager import ClusterManager
from Ump.objs.license.manager import LicenseManager
from Ump.objs.disk.manager import DiskManager 

from Ump.lich.shell import LichShell, LichShellParam
from Ump.lich.node import LichNode, LichNodeParam

from agentapi import host as host_api

from Ump.objs.session_wrapper import _sw

cluster_manager = ClusterManager()
diskm = DiskManager()
alert_api = AlertApi()


LOG = log.get_log('Ump.objs.host.manager')


STOPPED = 'stopped'
RUNNING = 'running'
STOPPING = 'stopping'
DELETING = 'deleting'


@models.add_model(models.Host)
class HostManager(Manager):

    def __init__(self):
        super(HostManager, self).__init__()
        self.admin_port = config.lich_check_suspend_admin_port
        self.agent_port = config.lich_check_suspend_agent_port
        self.is_check_suspend = config.is_check_suspend

        self.cluster_manager = ClusterManager()
        self.license_manager = LicenseManager()
        self.lichShell = LichShell()
        self.lichNode = LichNode()

    @enable_oplog(resource='node', event='add')
    def addnode_callback(self, _logger, rsp):
        host_ips = json.loads(rsp.target)
        if hasattr(rsp, 'hosts') and rsp.hosts:
            host_ips = set(host_ips + json.loads(rsp.hosts))

        _logger.update_props(oplog_obj=','.join(host_ips))

        cluster = None
        if rsp.success:
            host = _sw.db_host({'ip': rsp.host})
            password = host.passwd
            for ip in host_ips:
                host = _sw.db_host({'ip': ip})
                if not host:
                    values = {
                        'ip': ip,
                        'status': RUNNING,
                        'lichd_status': RUNNING,
                        'cluster_id': 1,
                    }
                    
                    host = models.Host(values).save()
                    res = self._ssh_key(ip, password)

                host.update({'status': RUNNING, 'lichd_status': RUNNING})

                self.utils.exception_pass(self.async_scan_host, host)
                cluster = host.cluster
        else:
            for ip in host_ips:
                host = _sw.db_host({'ip': ip})
                if host:
                    host.delete()

        if cluster is not None:
            self.cluster_manager._sync_config(cluster, rsp.config)
            self.utils.exception_pass(self.license_manager.fusionnas_license_sync)

#            kwargs = {'_ss':'cluster_id': cluster.id, 'ssh_ip': host.ip}
#            self.utils.exception_pass(self.cluster_manager._license_sync, **kwargs)

    def addnode(self, kwargs):

        ips = kwargs['ips']
        new_host = self._create(**kwargs)

        self.sync_recover_qos(ips)

        return new_host

    def _create(self, ips, passwd, cluster_id=1, description=None, ip_pubs=None, is_force=False,
                is_arbitor=False, cls=None, **kwargs):
        '''
            ips: list or string separated by a comma
        '''
        ips = utils.parse_str2list(ips)
        passwd = utils.parse_str2list(passwd)

        cluster = _sw.db_cluster(cluster_id)
        clusterhosts = cluster.hosts
        hosts_in_cluster = [host.ip for host in clusterhosts]
        self._check_input_ips(ips, hosts_in_cluster)

        if clusterhosts == []:
            host_ip = ips[0]
            password = passwd[0]
            self._check_authentication(host_ip, password=password)
        else:
            password = clusterhosts[0].passwd
            host_ip = self._select_host(cluster_id)

        hostnames_dict = self._get_hostnames_dict(ips, passwd)
        # hostips = " ".join(hostnames_dict.keys())
        # hostnames = " ".join(hostnames_dict.values())


        new_host_ids = []
        #freehost = [host.node_register_info for host in clusterhosts if host.node_register_info == 'free']
        cluster_host_ips = [host.ip for host in clusterhosts ]
        for index, host in enumerate(ips):
            password = passwd[index]
            hostname = hostnames_dict.get(host)

            values = {
                'ip': host,
                'name': hostname,
                'hostname': hostname,
                'status': 'joining',
                'description': description,
                'is_join': True,
                'user': 'root',
                'cluster_id': cluster.id,
                'lichd_status': 'joining',
                'passwd': password,
            }

            try:
                host = models.Host(values).save()
                new_host_ids.append(host.id)
                cluster.hosts.append(host)
            except ResourceClosedError, e:
                pass
            except exception.ClusterDoubleIp,e:
                pass

        cmd = host_api.HostAddCmd()

        cmd.addresses = ips
        cmd.clusterhosts = cluster_host_ips
        cmd.host = host_ip
        cmd.passwd = passwd
        cmd.hostnames_dict = hostnames_dict
        cmd.target = json.dumps(ips)
        cmd.target_id = -1
        cmd.timeout = 20 * 60 

        self.taskm.async_post(cmd, callback=self.addnode_callback)

        return new_host_ids

    
    def async_scan_host(self, host, is_show=True):
        cmd = host_api.HostSyncCmd()
        cmd.target = host.ip
        cmd.target_id = host.id
        cmd.host = host.ip
        cmd.is_show = is_show
    
        cmd.is_sync_disk = True
        self.taskm.async_post(cmd, callback=self.host_sync_callback)

    def _loop_distribute_protection_domain(self, hostids):
        for host_id in hostids:
            self._distribute_protection_domain(host_id)
        return  

    def _check_input_ips(self, ips, hosts_in_cluster):
        if not len(ips) == len(set(ips)):
            raise Exception('集群内主机间IP不能重复，请输入不同的主机IP')

        for ip in ips:
            if ip not in hosts_in_cluster:
                continue
            raise Exception('%s已经添加到集群，不可重复添加' % ip)
        return True

    def _get_hostname(self, host, password):
        cmd = 'hostname'
        try:
            hostname = self._exec_remote(host, cmd, password=password)
        except Exception, e:
            err = "获取主机（%s）Hostname失败" % host
            raise Exception(err)
        return hostname
 
    def rollback_delhosts(self, ips, hostnames_dict, is_nohosts, host_ip):
        if not is_nohosts == 'on':
            return 
        for index,host in enumerate(ips):
            hostname = hostnames_dict.get(host)
            etc_host_cmd = '%s/lich/admin/node.py --delhosts %s %s'%\
                        (self.lich_home, host, hostname) 
            try:
                res = self._exec_remote(host_ip, etc_host_cmd)
            except:pass

    def _check_etc_hosts(self, ips, etc_hosts, is_nohosts):
        for host in ips:
            if not is_nohosts == 'on' and host not in etc_hosts:
                err = "主机IP（%s）与集群已配置的主机IP不符" % host
                raise Exception(err)
        return True

    def _get_hostnames_dict(self, ips, input_passwds):
        """
            return a dict : {ip:hostname}
        """
        hostnames_dict = {}
        for index, host in enumerate(ips):
            password = input_passwds[index]
            self._check_authentication(host, password=password)
            res = self._ssh_key(host, password)
#            self._deploy_agent(host, password, lich_home=home)
            hostname = self._get_hostname(host, password)
            hostnames_dict[host] = hostname.strip()

        return hostnames_dict
    
    def host_delete(self, kwargs):
        host_id = kwargs.get('host_id')
        host = _sw.db_host(host_id)

        cmd = host_api.HostDeleteCmd()
        cmd.target = host.ip
        cmd.target_id = host.id
        cmd.host = host.ip

        cmd.timeout = 24 * 60 * 60

        self.taskm.async_post(cmd, callback=self.delete_callback)

        host.update({'lichd_status': DELETING})

    @enable_oplog(resource='node', event='delete')
    def delete_callback(self, _logger, rsp):
        host = self._get_one(rsp.target_id)

        _logger.update_props(oplog_obj=host.ip)

        if rsp.success:
            host.delete()
        else:
            host.update({'lichd_status': RUNNING})
        return host


    def _get_one(self, host_id):
        host = _sw.get_one(models.Host, id_or_spec=host_id) 
        if not host:
            raise exception.NotFound(host_id=host_id)
        return host

    @enable_log_and_session(resource='node', event='start_node')
    def start(self, _logger, kwargs):
        host_id = kwargs.get('host_id')
        host = self._get_one(host_id)
        
        _logger.update_props(oplog_obj=host.ip, user_id=kwargs.get('op_user_id'))

        try:
            param =  LichShellParam(host.ip)
            res = self.lichNode.start(param)
        except Exception,e:
            if str(e).find("already online") != -1:
                pass
            else:
                raise Exception(e)
        host.update({'lichd_status':RUNNING})

    def stop(self, kwargs):
        host_id = kwargs.get('host_id')
        host = _sw.db_host(host_id)

        cmd = host_api.HostStopCmd()
        cmd.target = host.ip
        cmd.target_id = host.id
        cmd.host = host.ip

        self.taskm.async_post(cmd, callback=self.stop_callback)

        host.update({'lichd_status': STOPPING})

    @enable_oplog(resource='node', event='stop_node')
    def stop_callback(self, _logger, rsp):
        host = self._get_one(rsp.target_id)
        _logger.update_props(oplog_obj=host.ip)

        if rsp.success:
            host.update({'lichd_status': STOPPED})
        else:
            host.update({'lichd_status': RUNNING})
        return host


    def _check_single_disk(self, disk):
        disks = disk.host.disks
        disks = [disk_.device for disk_ in disks if disk_.stat == 'OK' and not disk_.isjoin]
        if len(disks) == 1 and disk.stat == 'OK' :
            msg_warn = '移除磁盘失败，单数据磁盘的情况下不允许进行移除磁盘操作'   
            LOG.warn(msg_warn, extra=self.get_extra)
            raise Exception(msg_warn)
        return True

    def _sync_memory(self, host):
        cmd = 'head -n 20 /proc/meminfo'
        meminfo = self.api_sync_call(host.ip, cmd)
        meminfo = utils._str2dict(meminfo)
        values = {'mem_str' : json.dumps(meminfo)}
        host.update(values)
        return host
    
    def _sync_cpu(self, host):
        """  
        get cpu avg used by percent 
        cpustr : ['cpu', '29319958', '1306', '23905537', '268583028', '2481154', '102', '95829', '2137922', '0']
        """  
        cpustats, cpu_cores_num, cpu_frequency  = self.__read_cpu_usage(host)
        if not cpustats:
            return 0  
    
        cpustr1 = cpustats[0].split()
        cpustr2 = cpustats[1].split()
        usni1 = sum([float(x) for x in cpustr1[1:8]])
        usn1 = sum([float(x) for x in cpustr1[1:4]])

        usni2 = sum([float(x) for x in cpustr2[1:8]])
        usn2 = sum([float(x) for x in cpustr2[1:4]])
        rs = ((usn2-usn1) / (usni2-usni1))
        cpu_usage ="%.3f" % (rs) 
        values = {'cpu_util': cpu_usage,'cpu_cores_num':cpu_cores_num, 'cpu_frequency':cpu_frequency}
        host.update(values)
        return host

    def __read_cpu_usage(self, host):
        cmd = 'host_sync'
        cmd = cmd + ';sleep 1;' + cmd
        cpustat = self.api_sync_call(host.ip, cmd)
        cpustats = cpustat.strip().split('\n')

        cmd = 'cat /proc/cpuinfo |grep processor|wc -l'
        cpu_cores_num = self.api_sync_call(host.ip, cmd)
        cpu_cores_num = cpu_cores_num.strip()

        cmd = "cat /proc/cpuinfo |grep 'cpu MHz'"
        cpuMHz = self.api_sync_call(host.ip, cmd)
        cpuMHzs = cpuMHz.split('\n')
        cpuMHz_vals = [float(x.strip().split(':')[-1]) for x in cpuMHzs if x and x.split(':')]
        cpuMHz = sum(cpuMHz_vals)
        
        return cpustats, cpu_cores_num, cpuMHz

    def sync_recover_qos(self, hosts):
        path = '/dev/shm/lich4/nodectl/recovery/'
        context = []

        if not os.path.exists(path):
            return []

        for f in os.listdir(path):
            if f.startswith('qos'):
                f = path + f
                with open(f, 'r') as s:
                    for l in s:
                        context.append((f,l.strip()))

        if len(context) == 0:
            return []

        for f, s in context:
            for host in hosts:
                cmd = 'touch %s;echo %s > %s' % (f, s, f)
                LOG.info('sync %s to %s' % (f, host))
                self._exec_remote(host, cmd)

    def sync_hostname(self, host):
        hostname = ''
        try:
            cmd = 'hostname'
            hostname = self.api_sync_call(host.ip, cmd)
            hostname = hostname.strip()
            values = {'hostname':hostname, 'name':hostname}
            host.update(values)
        except Exception,e:
            LOG.info('get hostname fail ：%s(%s)'%(host.ip, e))
            pass
        return hostname

    def _distribute_protection_domain(self, host_id):
        host = Host.query.get(host_id)
        hostname = host.hostname 
        if not hostname:
            return 

        hostname = hostname.strip()
        if hostname.count('.') == 0:
           hostname = "default.default.%s" % hostname
        if hostname.count('.') == 1:
           hostname = "default.%s" % hostname

        site, zone, name = hostname.strip().split('.')

        pd = self.ensure_pd(site)

        values = {'protection_domain_id': pd.id}
        host.update(values)
        return host


    @utils.retry(retry_number=10)
    def ensure_pd(self, site):
        '''when create pd is duplicate, to retry get_one from sql
        '''
        PD = ProtectionDomain.query.filter_by(name=site).first()
        if not PD:
            PD = ProtectionDomain({'name':site}).save()
        return PD


    def host_sync_callback(self, rsp):
        if rsp.success:
            host = Host.query.filter_by(id=rsp.target_id).first()

            if not host:
                raise exception.NotFound(ip=rsp.target)
            host = self._host_sync_callback(rsp, host)
        else:
            return 
        return host

    def _host_sync_callback(self, rsp, host):
        cpu_info = rsp.cpu_info
        mem_info = rsp.mem_info

        host.update(cpu_info)
        cpu_interval = self._update_cpu_interval(host)

        host.update(mem_info)
        mem_interval = self._update_mem_interval(host)

        hostname = rsp.hostname 
        nodestat = rsp.nodestat
        uss_host_stat = rsp.uss_host_stat

        values = {'hostname': hostname, 'name': hostname}

        if rsp.nodestat:
            nodestat = json.loads(nodestat)[0]

            lichd_stat = nodestat.get(RUNNING, '') and RUNNING or STOPPED

            lichd_stat = lichd_stat.strip()
            values['identity'] = nodestat.get('status')
            #lich_disks, isSucc = diskm._sync_disk(host)

            if host.lichd_status not in [STOPPING]:
                values['lichd_status'] = lichd_stat

        values.update({
            'uss_status': uss_host_stat,
            'status': RUNNING,
            'name': hostname.strip(),
            'hostname': hostname.strip(),
            'cpu_interval': cpu_interval,
            'mem_interval': mem_interval,
        })

        host.update(values)
        utils.exception_pass(self._distribute_protection_domain, host.id)

        diskm._sync_disk_callback(host, rsp)
        return host

    def _update_cpu_interval(self, host):
        # keep the cpu data within 10 minutes
        if host.cpu_interval:
            cpu_interval = json.loads(host.cpu_interval)
        else:
            cpu_interval = list()
        cpu_interval.append((time.time(), float(host.cpu_util)))
        while time.time() - cpu_interval[0][0] > 600:
            cpu_interval.pop(0)
        return json.dumps(cpu_interval)

    def _update_mem_interval(self, host):
        # keep the memory data within 10 minutes
        if host.mem_interval:
            mem_interval = json.loads(host.mem_interval)
        else:
            mem_interval = list()
        mem_interval.append((time.time(), host.current_usage_mem))

        while time.time() - mem_interval[0][0] > 600:
            mem_interval.pop(0)
        return json.dumps(mem_interval)


    def host_sync(self, host_id):
        """sync each host's status
        """
        host = Host.query.filter_by(id=host_id).first()
        self.async_scan_host(host, is_show=False)

    
    def _node_sync_check(self, host_ip):
        sep_sign = "======"
        suspend_agent = "%s/lich/admin/lich_check_suspend_agent.py" % (self.lich_home)
        sep_sign_cmd = "echo %s;" % sep_sign
        cmd = "%s/lich/admin/syncump.py is_update_key;" % (self.lich_home) 
        cmd += sep_sign_cmd + "%s/lich/libexec/lich.license -m list;" % (self.lich_home)

        if self.is_check_suspend:
            cmd += (sep_sign_cmd + "ls /dev/shm/lich/blocklist/;"
                + sep_sign_cmd
                + "python %s --start --admin_host %s --admin_port %s --agent_port %s;" %\
                         (suspend_agent, self.admin_host, self.admin_port, self.agent_port)
                + sep_sign_cmd)
        cmd += "echo 'command exec success';"

        res  = self._exec_remote(host_ip, cmd, iscode=True)
        multi_stdout = res.split(sep_sign)
        is_update_out = is_invalid = netblock = is_start_ok = ''
        if len(multi_stdout) > 1:
            is_update_out = multi_stdout[0].strip()
            is_invalid = multi_stdout[1].strip()
            if self.is_check_suspend:
                netblock = multi_stdout[2].strip()

        if self.is_check_suspend:
            agent_cmd = '''bash -c "if [ \'`netstat -ntpl|grep :%s | awk '{print $7}' |awk -F "/" '{print $1}'`\' != '' ] \
			&& [ \'`ps aux|grep %s/lich/admin/lich_check_suspend_agent.py|grep -v grep |awk '{ print $2 }'`\' == \
			\'`netstat -ntpl|grep :%s | awk '{print $7}' |awk -F "/" '{print $1}'`\' ];then echo 'start success';fi"''' % (self.agent_port, self.lich_home, self.agent_port )
            is_start_ok  = self._exec_remote(host_ip, agent_cmd, iscode=True)
        is_agent_start_ok = is_start_ok.strip()
        return is_update_out, is_invalid, netblock , is_start_ok 

    def _node_sync_check_invalid(self, is_invalid):
        invalid_status = ['No license found', 'Node offline', 'License expired', 'Invalid license', 'Excess capacity']
        for x in invalid_status :
            if is_invalid.find(x) > -1 :
                return True
        return False

    def _pull_check_suspend_admin(self, cluster):

        lichtools = 'lichtools/admin'
        lichtoolsDir = os.path.join(self.ump_home, lichtools)
        versionFile = os.path.join(lichtoolsDir, "version")
        recode, localVerion, stderr  = utils._exec_pipe("cat %s" % versionFile, retry=0)
        
        lichAdminDir = "%s/lich/admin" % self.lich_home
        isSameVersion = True
        for host in cluster.hosts:
            versionCmd = "%s/lich/sbin/lichd -v|grep 'BuildId' |awk '{print $2}'" % self.lich_home
            lichVersion = self.api_sync_call(host.ip, versionCmd)

            if localVerion.strip() != lichVersion.strip():
                isSameVersion = False

            if isSameVersion:
                break

            utils.make_dir(lichtoolsDir)
            try:
                self._dir_get_remote(host.ip, lichtoolsDir, lichAdminDir)
                recode, stdout, stderr  = utils._exec_pipe("echo '%s' > %s" % (lichVersion, versionFile), retry=0)
                break
            except Exception,e:
                LOG.info(e)
                pass

    def _select_storage_network(self, cluster, args):
        """
           params: args| tuple ('172.16.175.1', 'pc', server_ip)
        """
        ips = args[0].split(',')
        server_ip = args[2]

        self.admin_host = server_ip
        for host in cluster.hosts:
            for ip in ips:
                if utils.check_network(host.ip, ip):
                    self.admin_host = ip
                    continue
        return self.admin_host

    def _start_check_suspend_admin(self, cluster):
        if not self.is_check_suspend:
            return 'ok'
        self._pull_check_suspend_admin(cluster)
        cmd = "python %s/lichtools/admin/lich_check_suspend_admin.py --start --admin_port %s;" % (self.ump_home, self.admin_port)
        recode, stdout,stderr  = utils._exec_pipe(cmd, retry=0)

        cmd = '''bash -c "if [ \'`netstat -ntpl|grep :%s | awk '{print $7}' |awk -F '/' '{print $1}'`\' != '' ] &&\
                 [  \'`ps aux|grep lichtools/admin/lich_check_suspend_admin.py|grep -v grep |awk '{ print $2 }'`\'\
                 == \'`lsof -i:%s |grep -v PID |awk '{ print $2 }'`\' ];then echo 'start success';fi"''' % (self.admin_port, self.admin_port)
        recode, is_start_ok, stderr  = utils._exec_pipe(cmd, retry=0)
        return is_start_ok

    def _get_extra(self, cluster):
        extra = dict()
        if cluster.extra  is not  None:
            extra = json.loads(cluster.extra)
        return extra

    def _check_no_admin_ip(self, cluster):
        if not self.admin_host and config.is_check_suspend:
            detail = "管理系统没有和集群同网段的IP，监控假死节点功能不可用"
            values = {'detail':detail, 'position':None,'level': 'WARNING', 'returncode':'9002'}
            alert = _sw.get_one(models.Alert, id_or_spec=dict(returncode='9002', is_handled=False))
            if not alert:
                alert = models.Alert(values).save()
                self.send_message('reload_alert')
    
    def _check_netblock(self, netblock):
        if netblock.strip() and config.is_check_suspend:
            detail = '发现假死节点（%s），请联系相关专业人员处理' % (netblock.strip())
            values = {'detail':detail, 'position':None,'level': 'ERROR', 'returncode':'9003'}
            alert = _sw.get_one(models.Alert, id_or_spec=dict(returncode='9003', is_handled=False))
            if not alert:
                alert = models.Alert(values).save()
                self.send_message('reload_alert')

        return  netblock.strip() and config.is_check_suspend

    def _suspend_process_start_fail(self, is_start_ok):
        if not is_start_ok.strip() and config.is_check_suspend:
            detail = "监控假死节点进程启动失败，监控假死节点功能不可用"
            values = {'detail':detail, 'position':None,'level': 'WARNING', 'returncode':'9004'}

            alert = _sw.get_one(models.Alert, id_or_spec=dict(returncode='9004', is_handled=False))
            if not alert:
                alert = models.Alert(values).save()
                self.send_message('reload_alert')

        return not is_start_ok.strip() and config.is_check_suspend
        
    def node_sync(self):
        cluster = _sw.db_cluster(1)
        ips = utils.get_manager_ip()
        self._select_storage_network(cluster, ips)

        for host in cluster.hosts:
            if host.is_host_deleting or host.is_deleting: 
                continue

            _check_num = 0 
            while _check_num < 5:
                _check_num += 1
                try:
                    hostname = self._exec_remote(host.ip, 'hostname')
                    values = {
                        'status': RUNNING,
                        'is_send': False,\
                        'lichd_status': RUNNING,
                        'node_down_at': datetime.datetime.today() - datetime.timedelta(seconds=4800)
                    }
                    host.update(values)
                    break
                except Exception,e:
                    pass
        
            if _check_num >= 5:
                host.update({'status':'shutdown','is_send':True,'lichd_status':STOPPED})
                detail = "节点%s状态异常，检查节点存储网络是否断开或节点宕机" % (host.ip)
                values = {'detail': detail, 'host_id':host.id, 'level': 'WARNING', 'returncode': '9014'}
                alert_api.alert_create(values, filter_host_id=host.id)
        


if __name__ == '__main__':
    hom = HostManager()
#    kwargs = {u'username': u'admin', u'name': u'', u'ips': [u'192.168.120.75'], u'passwd': [u'mdsmds'], u'op_user_id': 1, u'is_force': u'false', u'user': u'root'}
    #hom.host_sync(1)
    hom.host_sync(3)
